rm(list = ls())


library(dplyr)
library(survminer)
library(survival)
library(stargazer)


setwd("C:/Users/ym1293/Desktop/Magiya_et_al_Data_and_analysis")

library(readxl)
vali<-read_xlsx("Governorships_Ottoman_Final_03_01.xlsx", sheet = 1, guess_max = 10000)


vali$tenure<-vali$Tenure2..months...August.1876...October.30..1918.#variable for tenure length


#######################################################
#To get the start date
vali$start.date<-as.character(vali$Start)
start_char<-strsplit(vali$start.date, "/")
start_char_mat<-matrix(unlist(start_char), ncol=3, byrow=TRUE)
vali$start.year<-as.integer(start_char_mat[,3])
vali$start.month<-as.integer(start_char_mat[,1])
#######################################################



#######################################################
#Recoding Variables
vali$Population<-vali$Population/100000 #Population variable divided to 100 thousand: per 100 thousand people
vali$Distance<-vali$Distance/10^3 # Distance: per kilometer

hist(vali$Distance)
hist(log(vali$Distance))

hist(vali$Population)
hist(log(vali$Population))

vali$log.population<-log(vali$Population+1)#log transformed due to skew
vali$log.distance<-log(vali$Distance+1)# log transformed due to skew

vali$Not_right_censored<-(vali$Right_Censored * -1) +1# Surv takes 0 as right-censored so needed to change


vali <- vali %>% mutate(before = ifelse(start.year<1876 | 
                                          (start.year==1876& start.month<9)
                                        , 1, 0)) # Assigns 1 to those who started tenure before Abdulhamid II's reign
vali$before[(vali$start.year<=1909 & vali$start.month<=4) & (vali$start.year>=1909 & vali$start.month>4)]<-1 #Assigns 1 to those who started tenure before CUP
#######################################################


####################################
#Mean standardizing the ELF variable for the whole sample
mean.elf.all<-mean(vali$Ethnic.Fractionalization,na.rm=T)
sd.elf.all<-sd(vali$Ethnic.Fractionalization,na.rm=T)
vali$elf_std<-(vali$Ethnic.Fractionalization-mean.elf.all)/sd.elf.all
##############################


##########################################
#Getting the subset for Abdulhamid era
abdulhamid_era<-subset(vali,vali$Reign=="Abdulhamid")
abdulhamid_era<-subset(abdulhamid_era,abdulhamid_era$Right_Censored!="NA")
abdulhamid_era<-subset(abdulhamid_era,abdulhamid_era$tenure!="NA")
##########################################


##########################################
#Mean standardizing the ELF variable for the Whole Sample
mean.elf<-mean(abdulhamid_era$Ethnic.Fractionalization,na.rm = T)
sd.elf<-sd(abdulhamid_era$Ethnic.Fractionalization,na.rm = T)
abdulhamid_era$elf_std<-(abdulhamid_era$Ethnic.Fractionalization-mean.elf)/sd.elf
##########################################



################################################################
################################################################
################################################################
#####Producing the Model in Table 2#############################
################################################################
################################################################
################################################################


abdulhamid.ethnic<- coxph(Surv(tenure, Not_right_censored) ~
                            elf_std
                             + before
                             + Railroad 
                             +Slope
                             +Border
                          +Sea
                          +log.population 
                          +log.distance
                             + cluster(Province)
                             , data = abdulhamid_era)
summary(abdulhamid.ethnic)




list_means_abdulhamid<-list(c("Dependent Variable Mean", round(mean(abdulhamid_era$tenure, na.rm=T), digits = 2)))#Calculates the dependent variable's mean
list_sd_abdulhamid<-list(c("Dependent Variable SD", round(sd(abdulhamid_era$tenure, na.rm=T), digits = 2)))#Calculates the dependent variable's standard deviation



#Producing Table 2 with Hazard Ratios
stargazer(
  abdulhamid.ethnic,
  
  out="Hazard Ratios-Abdulhamid-Main.html",
  title  = "Cox Proportional Hazards Model", apply.coef = exp,  t.auto=F, 
  p.auto=F, report = "vct*",

  covariate.labels = c("Ethnolinguistic Fractionalization (1 sd)", 
                       "Governor Assigned Before Rule",
                       "Railroad Dummy",
                       "Avg. Slope (degrees)",
                       "Land Border Dummy",
                       "Sea Opening Dummy",
                       "Log of Population (100 thousand)",
                       "Log of Distance from Istanbul (km)"
             
  ),
  dep.var.caption  = "Dependent Variable:",
  dep.var.labels   = "Governor Tenure Length (Months)",
  add.lines=
    c(list_means_abdulhamid,
      list_sd_abdulhamid),
  no.space=TRUE
)






################################################################
################################################################
################################################################
#####Producing the Model in Table A.3###########################
################################################################
################################################################
################################################################


ottoman.ethnic<- coxph(Surv(tenure, Not_right_censored) ~
                         elf_std
                       + before
                       + Railroad 
                       +Slope
                       +Border+Sea
                       +log.population 
                       +log.distance
                       + cluster(Province)
                       , data = vali)
summary(ottoman.ethnic)



list_means_ottoman<-list(c("Dependent Variable Mean", round(mean(vali$tenure, na.rm=T), digits = 2)))#Calculates the dependent variable's mean
list_sd_ottoman<-list(c("Dependent Variable SD", round(sd(vali$tenure, na.rm=T), digits = 2)))#Calculates the dependent variable's standard deviation


#Producing Table A.3 with Hazard Ratios
stargazer(
  ottoman.ethnic,
  out="Hazard Ratios-Ottoman-Main.html",
  title  = "Cox Proportional Hazards Model", apply.coef = exp,  t.auto=F, 
  p.auto=F, report = "vct*",
  covariate.labels = c("Ethnolinguistic Fractionalization (1 sd)", 
                       "Governor Assigned Before Rule",
                       "Railroad Dummy",
                       "Avg. Slope (degrees)",
                       "Land Border Dummy",
                       "Sea Opening Dummy",
                       "Log of Population (100 thousand)",
                       "Log of Distance from Istanbul (km)"
  ),
  dep.var.caption  = "Dependent Variable:",
  dep.var.labels   = "Governor Tenure Length (Months)",
  add.lines=
    c(list_means_abdulhamid,
      list_sd_abdulhamid),
  no.space=TRUE

)


